# function to parse through each medicare df for just those in NYC
nyc_medicare <- function(medicare_df, zip_codes = nyc_zip_codes, zipBx = 104, zipBk = 112, zipMn = 100, zipSI = 103){
nyc_medicare <- medicare_df %>%
filter(`Provider Zip Code` %in% zip_codes) %>%
mutate(zip_3 = substr(`Provider Zip Code`, 1, 3),
borough = ifelse(zip_3 == zipBx, "Bronx",
ifelse(zip_3 == zipBk, "Brooklyn",
ifelse(zip_3 == zipMn, "Manhattan",
ifelse(zip_3 == zipSI, "Staten Island", "Queens")))),
system = ifelse(`Provider Name` %in% nyc_health_plus, "Health+",
ifelse(`Provider Name` %in% mount_sinai, "Mount Sinai",
ifelse(`Provider Name` %in% ny_pres, "NewYork-Presbyterian",
ifelse(`Provider Name` %in% northwell, "Northwell",
ifelse(`Provider Name` %in% nyu_langone, "NYU Langone",
ifelse(`Provider Name` %in% montefiore, "Montefiore",
ifelse(`Provider Name` %in% suny, "SUNY", "Others"))))))))
nyc_medicare
}
# returning only the NYC dataset
nyc_medicare_2017 <- nyc_medicare(medicare_part_b_2017)
nyc_medicare_2016 <- nyc_medicare(medicare_part_b_2016)
nyc_medicare_2015 <- nyc_medicare(medicare_part_b_2015)
nyc_medicare_2014 <- nyc_medicare(medicare_part_b_2014)
# tabling NYC hospitals
nyc_hospitals_borough <- nyc_medicare_2017 %>%
group_by(borough) %>%
summarise(number_hospitals = n_distinct(`Provider Name`))
datatable(nyc_hospitals_borough)
Okay so if we look at this nyc_hospitals_borough, we clearly see that there is a geographical imbalance in terms of the number of hospitals per borough. Queens is probably the most underserved borough in this regard since it is also the biggest borough by land area, right?
## Geocoding Hospitals and NYC
hospital_systems <- nyc_medicare_2017 %>%
select(`Provider Name`, system) %>%
unique()
## Avoid geocoding everytime I knit
# nyc_hospitals_address_geocode <- geocode(location = hospital_names, output = "latlon")
# write_csv(nyc_hospitals_address_geocode, "data/hospitals_lat_long.csv")
# nyc_geocode <- geocode("New York City")
# nyc_lat_long <- nyc_geocode %>%
# unlist() %>%
# unname()
## Load csv MAKE SURE
# nyc_hospitals_address_geocode <- read_csv(paste(path, "hospitals_lat_long.csv", sep = ""))
# nyc_hospitals_address <- cbind(hospital_systems, nyc_hospitals_address_geocode)
## Mount Sinai Hospital is always wrong
## Must change both files or it will mess up the conversion to ESRI shapefile
# nyc_hospitals_address[4,3:4] <- c(-73.953143, 40.7877519)
# nyc_hospitals_address_geocode[4,1:2] <- c(-73.953143, 40.7877519)
#write.csv(nyc_hospitals_address, paste(path,"nyc_hospitals_systems_lat_long.csv",sep = ""))
## using the sp functions
# epsg32118_nad83 <- CRS("+proj=lcc +lat_1=41.03333333333333 +lat_2=40.66666666666666 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")
# nyc_hospitals_spdf <- SpatialPointsDataFrame(coords = nyc_hospitals_address_geocode, data = nyc_hospitals_address, proj4string = epsg32118_nad83)
# writeOGR(obj=nyc_hospitals_spdf, dsn = "nyc_hospitals", layer = "nyc_hospitals", driver = "ESRI Shapefile", overwrite_layer = T)
# calculating catchment area
nyc_hospital_thiessen$area <- st_area(nyc_hospital_thiessen)*3861.02
nyc_hospital_thiessen$area_sqmi <- substr(nyc_hospital_thiessen$area, 1,5)
catchment_area_df <- nyc_hospital_thiessen %>%
select(Hospital.N, System, area_sqmi) %>%
st_drop_geometry()
# borough_list <- nyc_medicare_2017 %>%
# select(`Provider Name`, borough) %>%
# unique()
# catchment_area <- substr(nyc_hospital_shp$area,1,5)
# catchment_area_df <- cbind(nyc_hospital_shp$hsptl_n,catchment_area) %>%
# as.data.frame() %>%
# arrange(desc(V1)) %>%
# inner_join(.,borough_list, by = c("V1" = "Provider Name"))
# colnames(catchment_area_df) <- c("Provider Name", "Catchment Area (sq.mi)","Borough")
# write.csv(catchment_area_df, "./data/nyc_hospitals_thiessen_polygons/hospitals_catchment_area.csv")
# grouping catchment area for export
catchment_area_df_grouped <- catchment_area_df %>%
filter(System != "Others") %>%
group_by(System) %>%
summarise(catchment_area_sqmi = sum(as.numeric(area_sqmi)))
catchment_area_df_grouped2 <- catchment_area_df %>%
filter(System == "Others") %>%
select(Hospital.N, area_sqmi)
colnames(catchment_area_df_grouped2) <- c("System", "catchment_area_sqmi")
catchment_area_df_grouped <- rbind(catchment_area_df_grouped, catchment_area_df_grouped2)
datatable(catchment_area_df)
Here we can see that the hospitals with the largest catchment areas are in Queens, SI, and the Bronx. Visualizing it in a map, we have the following:
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(data = nyc_hospital_thiessen, label = ~area_sqmi,
fillOpacity = 0.5, stroke = T, color = "#8da0cb", weight = 2) %>%
addCircleMarkers(data = nyc_all_hospitals, label = ~Hospital.N,
weight = 3, radius = 3, color = "#ffa500")
We also have another problem. How do you determine if a procedure is the most common? Do you sum up the number of procedures in a given year, and then rank? Or do you find the most common procedure at each hospital? There are 418 covered procedures in total and we will look at 104 of them, the top quartile basically.
procedures_count <- nyc_medicare_2017 %>%
group_by(`DRG Definition`) %>%
summarise(discharge_count = sum(`Total Discharges`)) %>%
mutate(DRG_num = substr(`DRG Definition`, 1, 3)) %>%
arrange(desc(discharge_count))
head(procedures_count, n = 5)
## # A tibble: 5 x 3
## `DRG Definition` discharge_count DRG_num
## <chr> <dbl> <chr>
## 1 871 - SEPTICEMIA OR SEVERE SEPSIS W/O MV >96 HOURS W ~ 11414 871
## 2 470 - MAJOR JOINT REPLACEMENT OR REATTACHMENT OF LOWE~ 8640 470
## 3 291 - HEART FAILURE & SHOCK W MCC 6420 291
## 4 392 - ESOPHAGITIS, GASTROENT & MISC DIGEST DISORDERS ~ 3094 392
## 5 312 - SYNCOPE & COLLAPSE 2965 312
So, if we look at the most common procedures by the total number of discharges, septicemia/sepsis treatments are leading the pack.
nyc_medicare_2017_q1 <- nyc_medicare_2017 %>%
filter(`DRG Definition` %in% unlist(procedures_count$`DRG Definition`)[1:104]) %>%
select(`Provider Name`, `DRG Definition`, `Total Discharges`:`Average Medicare Payments`, borough, system) %>%
mutate(DRG_num = as.character(substr(`DRG Definition`, 1, 3)))
nyc_medicare_2017_top40 <- nyc_medicare_2017_q1 %>%
filter(`DRG Definition` %in% unlist(procedures_count$`DRG Definition`)[1:40])
cost_boxplot <- ggplot(nyc_medicare_2017_top40, aes(DRG_num, `Average Total Payments`)) +
geom_boxplot() +
labs(title = "Box Plot of Average Total Payments by top 40 DRGs",
subtitle = "Each observation is the averal total payments attributed to each hospital",
x = "DRG Number",
y = "Average Total Payments")
cost_boxplot
nyc_medicare_2017_q1_variances <- nyc_medicare_2017_q1 %>%
group_by(DRG_num) %>%
summarise(Range_Total = max(`Average Total Payments`)-min(`Average Total Payments`),
Range_NonCovered = max(`Average Total Payments` - `Average Medicare Payments`)-min(`Average Total Payments` - `Average Medicare Payments`),
Range_PctNonCovered = 100*(max(round(((`Average Total Payments`- `Average Medicare Payments`)/`Average Total Payments`),4)) - min(round(((`Average Total Payments`- `Average Medicare Payments`)/`Average Total Payments`),4))),
n_procedures = sum(`Total Discharges`),
n_hospitals = n_distinct(`Provider Name`))
total_cost_range_num_hospital_scatter <- ggplot(nyc_medicare_2017_q1_variances, aes(n_hospitals, Range_Total, label = DRG_num)) +
geom_point() +
labs(title = "Scatterplot of Total Costs Ranges",
subtitle = "Range for each DRG by the number of providers",
x = "Number of Hospitals",
y = "Range ($)")
ggplotly(total_cost_range_num_hospital_scatter)
pct_noncovered_range_num_hospital_scatter <- ggplot(nyc_medicare_2017_q1_variances, aes(n_hospitals, Range_PctNonCovered, label = DRG_num)) +
geom_point() +
labs(title = "Scatterplot of Range of Uncovered Costs as Prop of Total Costs",
subtitle = "Range for each DRG by the number of providers",
x = "Number of Hospitals",
y = "Range (%)") +
geom_smooth(se = F, method = "lm", color = "#b3cde3")
ggplotly(pct_noncovered_range_num_hospital_scatter)
pct_noncovered_range_num_procedure_scatter <- ggplot(nyc_medicare_2017_q1_variances, aes(n_procedures, Range_PctNonCovered, label = DRG_num)) +
geom_point() +
labs(title = "Scatterplot of Range of Uncovered Costs as Prop of Total Costs",
subtitle = "Range for each DRG by the number of procedures performed in total",
x = "Number of Procedures Performed",
y = "Range (%)")
ggplotly(pct_noncovered_range_num_procedure_scatter)
I want to say that using the range of costs is not very meaningful.
Okay, so there is a huge variance between hospitals for each of the top forty most common procedures. Further, the boxplots suggest that the distributions of costs for each DRG is are non-Normal. The next question is: are there hospitals that are consistently above or below average?
nyc_medicare_2017_q1_total_payments_zscores <- nyc_medicare_2017_q1 %>%
group_by(DRG_num) %>%
mutate(mean_total_payment = mean(`Average Total Payments`),
sd_total_payment = sd(`Average Total Payments`)) %>%
ungroup() %>%
mutate(z_score = (`Average Total Payments`-mean_total_payment)/sd_total_payment)
hospital_total_payments_scatter <- ggplot(nyc_medicare_2017_q1_total_payments_zscores, aes(`Provider Name`, z_score, label = DRG_num)) +
geom_point(aes(col = system)) +
geom_hline(yintercept = 0) +
coord_flip() +
labs(title = "Average Total Payments Scatterplot",
subtitle = "No. of std dev for each common DRG by hospital",
y = "Standard Deviations",
x = "Hospital")
ggplotly(hospital_total_payments_scatter)
Scandal! All NYC Health-Plus, ie public hospitals, charge more than private ones, with the exception of Coney Island Hospital. More precisely, on average, NYC Health+ hospitals are paid above-average amounts for the procedures performed. Well, I assume that Medicare does not just cover every single part of the procedure. Some parts of the entire treatment process might not be billable to Medicare. So what if we look at the Average Covered Charges?
nyc_medicare_2017_q1_covered_charges_zscores <- nyc_medicare_2017_q1 %>%
group_by(DRG_num) %>%
mutate(mean_covered_charges = mean(`Average Covered Charges`),
sd_covered_charges = sd(`Average Covered Charges`)) %>%
ungroup() %>%
mutate(z_score = (`Average Covered Charges`-mean_covered_charges)/sd_covered_charges)
hospital_covered_charges_scatter <- ggplot(nyc_medicare_2017_q1_covered_charges_zscores, aes(`Provider Name`, z_score, label = DRG_num)) +
geom_point(aes(col = system)) +
geom_hline(yintercept = 0) +
coord_flip() +
labs(title = "Average Covered Charges Scatterplot",
subtitle = "No. of std dev for each common DRG by hospital",
y = "Standard Deviations",
x = "Hospital")
ggplotly(hospital_covered_charges_scatter)
Evidently, this picture is a bit more mixed. So the Average Covered Charges of NYC Health+ hospitals tend to be below average whereas private that of private hospital systems tend to be above average. But if you think about it, what we are really interested in is, how much do beneficiaries (and their insurance providers) have to pay on top of what Medicare pays out. In other words, we really want to look into the co-payments, deductibles, and other additional payments from third parties for coordination of benefits.Non Medicare Payments = Average Total Payments - Average Medicare Payments.
nyc_medicare_2017_q1_non_medicare_payments_zscores <- nyc_medicare_2017_q1 %>%
group_by(DRG_num) %>%
mutate(average_non_medicare_payments = `Average Total Payments` - `Average Medicare Payments`,
mean_non_medicare_charges = mean(average_non_medicare_payments),
sd_non_medicare_charges = sd(average_non_medicare_payments)) %>%
ungroup() %>%
mutate(z_score = (average_non_medicare_payments-mean_non_medicare_charges)/sd_non_medicare_charges)
hospital_non_medicare_payments_scatter <- ggplot(nyc_medicare_2017_q1_non_medicare_payments_zscores, aes(`Provider Name`, z_score, label = DRG_num)) +
geom_point(aes(col = system)) +
geom_hline(yintercept = 0) +
coord_flip() +
labs(title = "Average Non-Covered Charges Scatterplot",
subtitle = "No. of std dev for each common DRG by hospital",
y = "Standard Deviations",
x = "Hospital") +
facet_wrap(~ borough, nrow = 5, scales = "free_y")
ggplotly(hospital_non_medicare_payments_scatter)
Okay, so this presents a much more nuanced picture. Even though Total Payments might be high at NYC Health+ hospitals, the amount that is not covered by Medicare tends to be below average at NYC Health+ hospitals. Among boroughts, the hospitals in Manhattan tend to incur more non-Medicare covered charges.
top_n_drg <- function(medicare_df, n, yr, procedure_count = procedures_count){
procedures_DRG <- unlist(procedures_count$DRG_num[1:n])
target_df <- medicare_df %>%
mutate(DRG_num = as.character(substr(`DRG Definition`, 1, 3))) %>%
filter(DRG_num %in% procedures_DRG) %>%
select(`Provider Name`, DRG_num, `Total Discharges`:`Average Medicare Payments`, borough, system) %>%
mutate(Average_Non_Medicare_Payments = `Average Total Payments` - `Average Medicare Payments`,
Year = yr,
`Provider Name` = str_replace_all(`Provider Name`, c("LUTHERAN MEDICAL CENTER" = "NYU LANGONE HOSPITALS", "NEW YORK METHODIST HOSPITAL" = "NEW YORK-PRESBYTERIAN BROOKLYN METHODIST HOSPITAL", "FOREST HILLS HOSPITAL" = "LONG ISLAND JEWISH MEDICAL CENTER", "BETH ISRAEL MEDICAL CENTER" = "MOUNT SINAI BETH ISRAEL", "ST LUKE'S ROOSEVELT HOSPITAL" = "MOUNT SINAI WEST", "NEW YORK HOSPITAL MEDICAL CENTER OF QUEENS" = "NEW YORK-PRESBYTERIAN/QUEENS", "MOUNT SINAI BETH ISRAEL/PETRIE CAMPUS" = "MOUNT SINAI BETH ISRAEL", "NYU HOSPITALS CENTER" = "NYU LANGONE HOSPITALS"))) %>%
arrange(borough, `Provider Name`, desc(DRG_num))
target_df
}
nyc_medicare_2017_topfive <- top_n_drg(nyc_medicare_2017, 5, 2017)
nyc_medicare_2016_topfive <- top_n_drg(nyc_medicare_2016, 5, 2016)
nyc_medicare_2015_topfive <- top_n_drg(nyc_medicare_2015, 5, 2015)
nyc_medicare_2014_topfive <- top_n_drg(nyc_medicare_2014, 5, 2014)
nyc_medicare_2014_2017_topfive_df <- rbind(nyc_medicare_2017_topfive, nyc_medicare_2016_topfive, nyc_medicare_2015_topfive, nyc_medicare_2014_topfive)
#system mapping
updated_system_data <- read_csv("./data/nyc_hospitals/nyc_hospitals_systems_lat_long.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## `Provider Name` = col_character(),
## system = col_character(),
## lon = col_double(),
## lat = col_double()
## )
updated_system_data <- updated_system_data[,2:3]
nyc_medicare_2014_2017_topfive_df <- inner_join(nyc_medicare_2014_2017_topfive_df, updated_system_data, by = "Provider Name")
nyc_medicare_2014_2017_topfive_df <- nyc_medicare_2014_2017_topfive_df[,-8]
colnames(nyc_medicare_2014_2017_topfive_df)[10] <- "system"
nyc_medicare_2014_2017_topfive_df2 <- nyc_medicare_2014_2017_topfive_df %>%
mutate(Total_Payments = `Average Total Payments`*`Total Discharges`,
Total_Medicare_Payments = `Average Medicare Payments`*`Total Discharges`,
Total_Covered_Charges = `Average Covered Charges`*`Total Discharges`,
Total_Non_Medicare_Payments = Average_Non_Medicare_Payments*`Total Discharges`) %>%
group_by(`Provider Name`, DRG_num, Year) %>%
summarize(`Total Discharges` = ifelse(n()==2,sum(`Total Discharges`), `Total Discharges`),
`Average Total Payments` = sum(Total_Payments)/`Total Discharges`,
`Average Medicare Payments`= sum(Total_Medicare_Payments)/`Total Discharges`,
`Average Covered Charges` = sum(Total_Covered_Charges)/`Total Discharges`,
Average_Non_Medicare_Payments = sum(Total_Non_Medicare_Payments)/`Total Discharges`)
nyc_medicare_2014_2017_topfive_df_wider <- nyc_medicare_2014_2017_topfive_df2 %>%
pivot_wider(., names_from = Year, values_from = c(`Total Discharges`:Average_Non_Medicare_Payments))
nyc_medicare_2014_2017_topfive_df_wider <- nyc_medicare_2014_2017_topfive_df_wider %>%
mutate(Discharges_2015 = round((`Total Discharges_2015` - `Total Discharges_2014`)/`Total Discharges_2014`*100, 1),
Medicare_Payments_2015 = round((`Average Medicare Payments_2015` - `Average Medicare Payments_2014`)/`Average Medicare Payments_2014`*100, 1),
Total_Payments_2015 = round((`Average Total Payments_2015` - `Average Total Payments_2014`)/`Average Total Payments_2014`*100, 1),
Non_Medicare_Payments_2015 = round((`Average_Non_Medicare_Payments_2015` - `Average_Non_Medicare_Payments_2014`)/`Average_Non_Medicare_Payments_2014`*100, 1),
Discharges_2016 = round((`Total Discharges_2016` - `Total Discharges_2015`)/`Total Discharges_2015`*100, 1),
Medicare_Payments_2016 = round((`Average Medicare Payments_2016` - `Average Medicare Payments_2015`)/`Average Medicare Payments_2015`*100, 1),
Total_Payments_2016 = round((`Average Total Payments_2016` - `Average Total Payments_2015`)/`Average Total Payments_2015`*100, 1),
Non_Medicare_Payments_2016 = round((`Average_Non_Medicare_Payments_2016` - `Average_Non_Medicare_Payments_2015`)/`Average_Non_Medicare_Payments_2015`*100, 1),
Discharges_2017 = round((`Total Discharges_2017` - `Total Discharges_2016`)/`Total Discharges_2016`*100, 1),
Medicare_Payments_2017 = round((`Average Medicare Payments_2017` - `Average Medicare Payments_2016`)/`Average Medicare Payments_2016`*100, 1),
Total_Payments_2017 = round((`Average Total Payments_2017` - `Average Total Payments_2016`)/`Average Total Payments_2016`*100, 1),
Non_Medicare_Payments_2017 = round((`Average_Non_Medicare_Payments_2017` - `Average_Non_Medicare_Payments_2016`)/`Average_Non_Medicare_Payments_2016`*100, 1))
nyc_medicare_2014_2017_topfive_df_longer <- pivot_longer(nyc_medicare_2014_2017_topfive_df_wider[,c(1:2,23,27,31)], cols = starts_with("Discharges_"),
names_to = "Discharge_Year", names_prefix = "Discharges_", values_to = "YoY_Discharges")
nyc_medicare_2014_2017_topfive_df_longer2 <- pivot_longer(nyc_medicare_2014_2017_topfive_df_wider[,c(1:2,24,28,32)], cols = starts_with("Medicare_Payments_"),
names_to = "Medicare_Payments_Year", names_prefix = "Medicare_Payments_", values_to = "YoY_Medicare_Payments")
nyc_medicare_2014_2017_topfive_df_longer3 <- pivot_longer(nyc_medicare_2014_2017_topfive_df_wider[,c(1:2,25,29,33)], cols = starts_with("Total_Payments_"),
names_to = "Total_Payments_Year", names_prefix = "Total_Payments_", values_to = "YoY_Total_Payments")
nyc_medicare_2014_2017_topfive_df_longer4 <- pivot_longer(nyc_medicare_2014_2017_topfive_df_wider[,c(1:2,26,30,34)], cols = starts_with("Non_Medicare_Payments"), names_to = "Non_Medicare_Payments_", names_prefix = "Non_Medicare_Payments_", values_to = "YoY_Non_Medicare_Payments")
nyc_medicare_2014_2017_topfive_df_temp <- inner_join(nyc_medicare_2014_2017_topfive_df_longer, nyc_medicare_2014_2017_topfive_df_longer2, by = c("Provider Name", "DRG_num", "Discharge_Year" = "Medicare_Payments_Year"))
nyc_medicare_2014_2017_topfive_df_temp <- inner_join(nyc_medicare_2014_2017_topfive_df_temp, nyc_medicare_2014_2017_topfive_df_longer3, by = c("Provider Name", "DRG_num", "Discharge_Year" = "Total_Payments_Year"))
nyc_medicare_2014_2017_topfive_df_temp <- inner_join(nyc_medicare_2014_2017_topfive_df_temp, nyc_medicare_2014_2017_topfive_df_longer4, by = c("Provider Name", "DRG_num", "Discharge_Year" = "Non_Medicare_Payments_"))
provider_systems <- nyc_medicare_2017 %>%
select(`Provider Name`, system) %>%
unique()
nyc_medicare_2014_2017_topfive_df_timeseries <- nyc_medicare_2014_2017_topfive_df_temp %>%
pivot_longer(., cols = starts_with("YoY_"), names_to = "Variable", names_prefix = "YoY_", values_to = "YoY_Change") %>%
mutate(Discharge_Year = lubridate::year(as.Date(Discharge_Year,format = "%Y")))
nyc_medicare_2014_2017_topfive_df_timeseries <- left_join(nyc_medicare_2014_2017_topfive_df_timeseries, provider_systems, by = "Provider Name")
nyc_medicare_2014_2017_topfive_independent_df_timeseries <- nyc_medicare_2014_2017_topfive_df_timeseries %>%
filter(system == "Others")
ggplot(nyc_medicare_2014_2017_topfive_df_timeseries, aes(x = Discharge_Year, y = YoY_Change, group = system, color = system)) +
geom_point() +
geom_path(alpha = 0.5) +
geom_hline(yintercept = 0, color = "red") +
facet_grid(DRG_num ~ Variable)
## Warning: Removed 152 rows containing missing values (geom_point).
write.csv(nyc_medicare_2014_2017_topfive_df_timeseries, paste(path, medicare_folder, "nyc_medicare_2014_2017_topfive_timeseries.csv", sep = ""))
# name changes
## lutheran medical center > nyu langone hospitals
## new york methodist hospital > new york - presbyterian brooklyn methodist hospital
## nyu hospital centers > nyu langone hospitals
## forest hills hospital > long island jewish medical center
## beth israel medical center > mount sinai beth israel
## st luke's roosevelt hospital > mount sinai west
## new york hospital medical center of queens > new york - presbyterian/queens
## mount sinai beth israel/petrie > mount sinai beth israel
nyc_medicare_2014_2017_topfive_system_df <- nyc_medicare_2014_2017_topfive_df %>%
filter(system != "Others") %>%
mutate(Total_Payments = `Average Total Payments`*`Total Discharges`,
Total_Medicare_Payments = `Average Medicare Payments`*`Total Discharges`,
Total_Covered_Charges = `Average Covered Charges`*`Total Discharges`,
Total_Non_Medicare_Payments = Average_Non_Medicare_Payments*`Total Discharges`) %>%
group_by(system, DRG_num, Year) %>%
summarize(`Total Discharges` = ifelse(n()==2,sum(`Total Discharges`), `Total Discharges`),
`Average Total Payments` = sum(Total_Payments)/`Total Discharges`,
`Average Medicare Payments`= sum(Total_Medicare_Payments)/`Total Discharges`,
`Average Covered Charges` = sum(Total_Covered_Charges)/`Total Discharges`,
Average_Non_Medicare_Payments = sum(Total_Non_Medicare_Payments)/`Total Discharges`)
nyc_medicare_2014_2017_topfive_system_df_wider <- nyc_medicare_2014_2017_topfive_system_df %>%
pivot_wider(., names_from = Year, values_from = c(`Total Discharges`:Average_Non_Medicare_Payments))
nyc_medicare_2014_2017_topfive_system_df_wider <- nyc_medicare_2014_2017_topfive_system_df_wider %>%
mutate(Discharges_2015 = round((`Total Discharges_2015` - `Total Discharges_2014`)/`Total Discharges_2014`*100, 1),
Medicare_Payments_2015 = round((`Average Medicare Payments_2015` - `Average Medicare Payments_2014`)/`Average Medicare Payments_2014`*100, 1),
Total_Payments_2015 = round((`Average Total Payments_2015` - `Average Total Payments_2014`)/`Average Total Payments_2014`*100, 1),
Non_Medicare_Payments_2015 = round((`Average_Non_Medicare_Payments_2015` - `Average_Non_Medicare_Payments_2014`)/`Average_Non_Medicare_Payments_2014`*100, 1),
Discharges_2016 = round((`Total Discharges_2016` - `Total Discharges_2015`)/`Total Discharges_2015`*100, 1),
Medicare_Payments_2016 = round((`Average Medicare Payments_2016` - `Average Medicare Payments_2015`)/`Average Medicare Payments_2015`*100, 1),
Total_Payments_2016 = round((`Average Total Payments_2016` - `Average Total Payments_2015`)/`Average Total Payments_2015`*100, 1),
Non_Medicare_Payments_2016 = round((`Average_Non_Medicare_Payments_2016` - `Average_Non_Medicare_Payments_2015`)/`Average_Non_Medicare_Payments_2015`*100, 1),
Discharges_2017 = round((`Total Discharges_2017` - `Total Discharges_2016`)/`Total Discharges_2016`*100, 1),
Medicare_Payments_2017 = round((`Average Medicare Payments_2017` - `Average Medicare Payments_2016`)/`Average Medicare Payments_2016`*100, 1),
Total_Payments_2017 = round((`Average Total Payments_2017` - `Average Total Payments_2016`)/`Average Total Payments_2016`*100, 1),
Non_Medicare_Payments_2017 = round((`Average_Non_Medicare_Payments_2017` - `Average_Non_Medicare_Payments_2016`)/`Average_Non_Medicare_Payments_2016`*100, 1))
nyc_medicare_2014_2017_topfive_system_df_longer <- pivot_longer(nyc_medicare_2014_2017_topfive_system_df_wider[,c(1:2,23,27,31)], cols = starts_with("Discharges_"),
names_to = "Discharge_Year", names_prefix = "Discharges_", values_to = "YoY_Discharges")
nyc_medicare_2014_2017_topfive_system_df_longer2 <- pivot_longer(nyc_medicare_2014_2017_topfive_system_df_wider[,c(1:2,24,28,32)], cols = starts_with("Medicare_Payments_"),
names_to = "Medicare_Payments_Year", names_prefix = "Medicare_Payments_", values_to = "YoY_Medicare_Payments")
nyc_medicare_2014_2017_topfive_system_df_longer3 <- pivot_longer(nyc_medicare_2014_2017_topfive_system_df_wider[,c(1:2,25,29,33)], cols = starts_with("Total_Payments_"),
names_to = "Total_Payments_Year", names_prefix = "Total_Payments_", values_to = "YoY_Total_Payments")
nyc_medicare_2014_2017_topfive_system_df_longer4 <- pivot_longer(nyc_medicare_2014_2017_topfive_system_df_wider[,c(1:2,26,30,34)], cols = starts_with("Non_Medicare_Payments"), names_to = "Non_Medicare_Payments_", names_prefix = "Non_Medicare_Payments_", values_to = "YoY_Non_Medicare_Payments")
nyc_medicare_2014_2017_topfive_df_temp <- inner_join(nyc_medicare_2014_2017_topfive_system_df_longer, nyc_medicare_2014_2017_topfive_system_df_longer2, by = c("system", "DRG_num", "Discharge_Year" = "Medicare_Payments_Year"))
nyc_medicare_2014_2017_topfive_df_temp <- inner_join(nyc_medicare_2014_2017_topfive_df_temp, nyc_medicare_2014_2017_topfive_system_df_longer3, by = c("system", "DRG_num", "Discharge_Year" = "Total_Payments_Year"))
nyc_medicare_2014_2017_topfive_df_temp <- inner_join(nyc_medicare_2014_2017_topfive_df_temp, nyc_medicare_2014_2017_topfive_system_df_longer4, by = c("system", "DRG_num", "Discharge_Year" = "Non_Medicare_Payments_"))
nyc_medicare_2014_2017_topfive_system_df_timeseries <- nyc_medicare_2014_2017_topfive_df_temp %>%
pivot_longer(., cols = starts_with("YoY_"), names_to = "Variable", names_prefix = "YoY_", values_to = "YoY_Change") %>%
mutate(Discharge_Year = lubridate::year(as.Date(Discharge_Year,format = "%Y")))
nyc_medicare_2014_2017_topfive_independent_df_timeseries <- nyc_medicare_2014_2017_topfive_independent_df_timeseries[,1:5]
colnames(nyc_medicare_2014_2017_topfive_independent_df_timeseries)[1] <- "system"
nyc_medicare_2014_2017_topfive_allsystem_df_timeseries <- bind_rows(nyc_medicare_2014_2017_topfive_system_df_timeseries, nyc_medicare_2014_2017_topfive_independent_df_timeseries)
nyc_medicare_2014_2017_topfive_allsystem_df_timeseries_wider <- pivot_wider(nyc_medicare_2014_2017_topfive_allsystem_df_timeseries, values_from = YoY_Change, names_from = DRG_num:Variable)
nyc_medicare_2014_2017_topfive_allsystem_df_timeseries_wider <- nyc_medicare_2014_2017_topfive_allsystem_df_timeseries_wider %>%
arrange(desc(system))
ggplot(nyc_medicare_2014_2017_topfive_allsystem_df_timeseries, aes(x = Discharge_Year, y = YoY_Change, group = system, color = system)) +
geom_point() +
geom_path(alpha = 0.5) +
geom_hline(yintercept = 0, color = "red") +
facet_grid(DRG_num ~ Variable) +
coord_cartesian(ylim = c(-100,100)) +
labs(subtitle = "Coords zoomed to (-100:100, 2015:2017) to eliminate anomalies")
## Warning: Removed 50 rows containing missing values (geom_point).
write.csv(nyc_medicare_2014_2017_topfive_allsystem_df_timeseries, paste(path, medicare_folder, "nyc_medicare_2014_2017_topfive_allsystem_timeseries.csv", sep = ""))